org 100h   ; assume ax=bx=0 si=0x100 di=sp=-2

  mov al,0x13

;Palette: 8 color gradients [21$]
;bx=index dh=r ch=g cl=b
P int 0x10     ; init: set 320x200 mode
  cwd           ; dh = red = i>>3
  mov cl,bl
  shr cl,1
  mov ch,cl    ; blue = i>>1
  shr ch,1
  jc Q
  xchg dh,cl
Q mov ax,0x1010; set palette index
  dec bx
  jnz P

  mov si,0x4000
  mov gs,si    ; bx=gs=0x4000: table segment, sin->cos phase

  push 0xa000 - 160/16 ; sp=-4

;Sine table: 16384 float32 entries (-1..1)
  fninit       ; [di]=0
S mov bp,[di]
  fild word[di]             ;| t
  fidiv word[c65536div2pi]  ;| T=t/65536*2pi
  fcos                      ;| cosT
  fstp dword[gs:bp+si]      ;| to [bp+0x4000]
  sub [di],sp  ; +4
  jnz S        ; bp=-4

  pop es       ; es=0x9ff6: centered screen segment

M:

;For each pixel: find dX,dY,dZ and initialize X,Y,Z
;bx=time di=pixel_address si=0x4000
X mov ax,0xcccd
  mul di
  mov cl,112   ; cl:dh=dZ=0x70??
  sub dh,cl    ; dh:dl=dY (centered)
  xchg ax,bx   ; dl:bh=dX ax=time
  pusha        ; -10 -9 -8 -7 -6 -5 -4 -3
               ;  bl bh dl dh cl ch al ah
               ;     ( dX )      0  (time)
               ;        ( dY )
               ;           ( dZ )

  xchg ax,bx   ; bx=Z=time
  mov ax,si    ; ax=X=0x4000
  cwd          ; dx=Y=0

; Load dX,dY,dZ and rotate dX,dZ by time
                     ;  si=0x4000 si=0      si=0x4000 si=0
                     ;  bp=-4     bp=-4     bp=-2     bp=-2
L fild word[bp-5]    ;| dX      | dX      | dZ      | dZ
  fmul dword[gs:bx+si];|dX*cosT | dX*sinT | dZ*cosT | dZ*sinT
  xor si,ax
  jz L
  xor bp,2
  jpo L              ;| dZ*s dZ*c dX*s dX*c ; bx=0x4000 bp=-4
  fsubp st3,st0
  faddp              ;| dX=dZ*c+dX*s dZ=dX*c-dZ*s
  fild word[bp-4]    ;| dY dX dZ

  shl bx,3     ;Z=time*8

Z:
;Compute the distance to the gyroid
G fld dword[gs:bx+si]
  xchg ax,dx   ;| cosZ | cosY | cosX
  xchg ax,bx   ; ax=X dx=Y si=Z -> ax=Z dx=X si=Y
  fmul dword[gs:bx]
  inc bp       ;| cosZ*sinY | cosY*sinX | cosX*sinZ
  jpo G        ; bp=-1
  faddp
  faddp        ;| d=cosZ*sinY+cosY*sinX+cosX*sinZ
  fist word[bp+si]; store d (+1 or -1) to [si-1]
  fabs         ;| |d|
  fldl2e       ;| k=1.442695 |d|
  fsubrp st1,st0;| D=k-|d|
;  fsubr dword[cOffset]
;  fmul dword[cDistFactor]

;Advance ray by distance
               ;| D dY dX dZ
A fxch st1     ;| dY D dX dZ
  fst st4      ;| dY D dX dZ dY
  fmul st1     ;| dY*D D dX dZ dY
  fistp dword[si]
  xchg ax,dx   ; ax=X dx=Y si=Z -> ax=Y dx=X si=Z
  sar dword[si],2
  add ax,[si]  ; Y+=dY*D/4 | X+=dX*D/4 | Z+=dZ*D/4
  and al,0xfc  ; align to a multiple of 4 (for sine table)
  xchg ax,bx   ; ax=Y dx=X si=Z -> ax=Z dx=X si=Y
  dec bp
  jpo A        ; bp=-4

;Close enough?
  fstp dword[si]; store D as float32
  cmp byte[si+3],0x3e
  jl E         ; hit if D<0.125 (= bits(D)<0x3e000000)
  add cx,bp
  jnz Z        ; max 28 iterations

;Draw pixel
E fcompp
  fstp st0     ;|
  xchg ax,cx
  add al,[si-1]
  stosb
  popa
  xchg ax,bx

;Next pixel
  inc di
  jnz X

;Next frame
  inc bh       ; time++
  in al,0x60   ; esc check
  dec al
  jnz M
  ret

c65536div2pi: dw 10430 ; 65536/2pi
;cDistFactor: dd 0.25   ; (0.75 (Lipchitz constant) / 44700 (avg dir length)) * 65536/2pi
;cOffset: dd 1.5


;; Palette test
;  push 0xa000
;  pop es
;  xor di,di
;  xor ax,ax
;Y stosb
;  inc al
;  jnz Y
;  add di,64
;  jns Y
;
;  xor ax,ax
;  int 0x16
;  ret

